// -*- C++ -*-
//===---------------------------- numeric ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX_NUMERIC
#define _LIBCUDACXX_NUMERIC

/*
    numeric synopsis

namespace std
{

#ifndef __cuda_std__
template <class InputIterator, class T>
    T
    accumulate(InputIterator first, InputIterator last, T init);

template <class InputIterator, class T, class BinaryOperation>
    T
    accumulate(InputIterator first, InputIterator last, T init, BinaryOperation binary_op);

template<class InputIterator>
    typename iterator_traits<InputIterator>::value_type
    reduce(InputIterator first, InputIterator last);  // C++17

template<class InputIterator, class T>
    T
    reduce(InputIterator first, InputIterator last, T init);  // C++17

template<class InputIterator, class T, class BinaryOperation>
    T
    reduce(InputIterator first, InputIterator last, T init, BinaryOperation binary_op);  // C++17

template <class InputIterator1, class InputIterator2, class T>
    T
    inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init);

template <class InputIterator1, class InputIterator2, class T, class BinaryOperation1, class BinaryOperation2>
    T
    inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2,
                  T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2);


template<class InputIterator1, class InputIterator2, class T>
    T
    transform_reduce(InputIterator1 first1, InputIterator1 last1,
                     InputIterator2 first2, T init);  // C++17

template<class InputIterator1, class InputIterator2, class T, class BinaryOperation1, class BinaryOperation2>
    T
    transform_reduce(InputIterator1 first1, InputIterator1 last1,
                     InputIterator2 first2, T init,
                     BinaryOperation1 binary_op1, BinaryOperation2 binary_op2);  // C++17

template<class InputIterator, class T, class BinaryOperation, class UnaryOperation>
    T
    transform_reduce(InputIterator first, InputIterator last, T init,
                     BinaryOperation binary_op, UnaryOperation unary_op);  // C++17

template <class InputIterator, class OutputIterator>
    OutputIterator
    partial_sum(InputIterator first, InputIterator last, OutputIterator result);

template <class InputIterator, class OutputIterator, class BinaryOperation>
    OutputIterator
    partial_sum(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op);

template<class InputIterator, class OutputIterator, class T>
    OutputIterator
    exclusive_scan(InputIterator first, InputIterator last,
                   OutputIterator result, T init); // C++17

template<class InputIterator, class OutputIterator, class T, class BinaryOperation>
    OutputIterator
    exclusive_scan(InputIterator first, InputIterator last,
                   OutputIterator result, T init, BinaryOperation binary_op); // C++17

template<class InputIterator, class OutputIterator>
    OutputIterator
    inclusive_scan(InputIterator first, InputIterator last, OutputIterator result);  // C++17

template<class InputIterator, class OutputIterator, class BinaryOperation>
    OutputIterator
    inclusive_scan(InputIterator first, InputIterator last,
                   OutputIterator result, BinaryOperation binary_op);  // C++17

template<class InputIterator, class OutputIterator, class BinaryOperation, class T>
    OutputIterator
    inclusive_scan(InputIterator first, InputIterator last,
                   OutputIterator result, BinaryOperation binary_op, T init);  // C++17

template<class InputIterator, class OutputIterator, class T,
         class BinaryOperation, class UnaryOperation>
    OutputIterator
    transform_exclusive_scan(InputIterator first, InputIterator last,
                             OutputIterator result, T init,
                             BinaryOperation binary_op, UnaryOperation unary_op);  // C++17

template<class InputIterator, class OutputIterator,
         class BinaryOperation, class UnaryOperation>
    OutputIterator
    transform_inclusive_scan(InputIterator first, InputIterator last,
                             OutputIterator result,
                             BinaryOperation binary_op, UnaryOperation unary_op);  // C++17

template<class InputIterator, class OutputIterator,
         class BinaryOperation, class UnaryOperation, class T>
    OutputIterator
    transform_inclusive_scan(InputIterator first, InputIterator last,
                             OutputIterator result,
                             BinaryOperation binary_op, UnaryOperation unary_op,
                             T init);  // C++17

template <class InputIterator, class OutputIterator>
    OutputIterator
    adjacent_difference(InputIterator first, InputIterator last, OutputIterator result);

template <class InputIterator, class OutputIterator, class BinaryOperation>
    OutputIterator
    adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op);
#endif // __cuda_std__

template <class ForwardIterator, class T>
    void iota(ForwardIterator first, ForwardIterator last, T value);

#ifndef __cuda_std__
template <class M, class N>
    constexpr common_type_t<M,N> gcd(M m, N n);    // C++17

template <class M, class N>
    constexpr common_type_t<M,N> lcm(M m, N n);    // C++17
#endif // __cuda_std__

integer         midpoint(integer a, integer b);                  // C++20
pointer         midpoint(pointer a, pointer b);                  // C++20
floating_point  midpoint(floating_point a, floating_point b);    // C++20

}  // std

*/

#ifndef __cuda_std__
#include <__config>
#endif // __cuda_std__

#include "__assert" // all public C++ headers provide the assertion handler
#include "__iterator/iterator_traits.h"
#include "__utility/move.h"
#include "cmath" // for isnormal
#include "functional"
#include "limits" // for numeric_limits
#include "version"

#ifndef __cuda_std__
#include <__pragma_push>
#endif // __cuda_std__

#if defined(_LIBCUDACXX_USE_PRAGMA_GCC_SYSTEM_HEADER)
#pragma GCC system_header
#endif

_LIBCUDACXX_BEGIN_NAMESPACE_STD

#ifndef __cuda_std__
template <class _InputIterator, class _Tp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
accumulate(_InputIterator __first, _InputIterator __last, _Tp __init)
{
    for (; __first != __last; ++__first)
        __init = __init + *__first;
    return __init;
}

template <class _InputIterator, class _Tp, class _BinaryOperation>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
accumulate(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
{
    for (; __first != __last; ++__first)
        __init = __binary_op(__init, *__first);
    return __init;
}

#if _LIBCUDACXX_STD_VER > 14
template <class _InputIterator, class _Tp, class _BinaryOp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOp __b)
{
    for (; __first != __last; ++__first)
        __init = __b(__init, *__first);
    return __init;
}

template <class _InputIterator, class _Tp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
reduce(_InputIterator __first, _InputIterator __last, _Tp __init)
{
    return _CUDA_VSTD::reduce(__first, __last, __init, _CUDA_VSTD::plus<>());
}

template <class _InputIterator>
inline _LIBCUDACXX_INLINE_VISIBILITY
typename iterator_traits<_InputIterator>::value_type
reduce(_InputIterator __first, _InputIterator __last)
{
    return _CUDA_VSTD::reduce(__first, __last,
       typename iterator_traits<_InputIterator>::value_type{});
}
#endif

template <class _InputIterator1, class _InputIterator2, class _Tp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _Tp __init)
{
    for (; __first1 != __last1; ++__first1, (void) ++__first2)
        __init = __init + *__first1 * *__first2;
    return __init;
}

template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2,
              _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2)
{
    for (; __first1 != __last1; ++__first1, (void) ++__first2)
        __init = __binary_op1(__init, __binary_op2(*__first1, *__first2));
    return __init;
}

#if _LIBCUDACXX_STD_VER > 14
template <class _InputIterator, class _Tp, class _BinaryOp, class _UnaryOp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
transform_reduce(_InputIterator __first, _InputIterator __last,
           _Tp __init,  _BinaryOp __b, _UnaryOp __u)
{
    for (; __first != __last; ++__first)
        __init = __b(__init, __u(*__first));
    return __init;
}

template <class _InputIterator1, class _InputIterator2,
          class _Tp, class _BinaryOp1, class _BinaryOp2>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1,
                 _InputIterator2 __first2, _Tp __init,  _BinaryOp1 __b1, _BinaryOp2 __b2)
{
    for (; __first1 != __last1; ++__first1, (void) ++__first2)
        __init = __b1(__init, __b2(*__first1, *__first2));
    return __init;
}

template <class _InputIterator1, class _InputIterator2, class _Tp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_Tp
transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1,
                 _InputIterator2 __first2, _Tp __init)
{
    return _CUDA_VSTD::transform_reduce(__first1, __last1, __first2, _CUDA_VSTD::move(__init),
                                   _CUDA_VSTD::plus<>(), _CUDA_VSTD::multiplies<>());
}
#endif

template <class _InputIterator, class _OutputIterator>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
{
    if (__first != __last)
    {
        typename iterator_traits<_InputIterator>::value_type __t(*__first);
        *__result = __t;
        for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
        {
            __t = __t + *__first;
            *__result = __t;
        }
    }
    return __result;
}

template <class _InputIterator, class _OutputIterator, class _BinaryOperation>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result,
              _BinaryOperation __binary_op)
{
    if (__first != __last)
    {
        typename iterator_traits<_InputIterator>::value_type __t(*__first);
        *__result = __t;
        for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
        {
            __t = __binary_op(__t, *__first);
            *__result = __t;
        }
    }
    return __result;
}

#if _LIBCUDACXX_STD_VER > 14
template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
exclusive_scan(_InputIterator __first, _InputIterator __last,
               _OutputIterator __result, _Tp __init, _BinaryOp __b)
{
    if (__first != __last)
    {
        _Tp __saved = __init;
        do
        {
            __init = __b(__init, *__first);
            *__result = __saved;
            __saved = __init;
            ++__result;
        } while (++__first != __last);
    }
    return __result;
}

template <class _InputIterator, class _OutputIterator, class _Tp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
exclusive_scan(_InputIterator __first, _InputIterator __last,
               _OutputIterator __result, _Tp __init)
{
    return _CUDA_VSTD::exclusive_scan(__first, __last, __result, __init, _CUDA_VSTD::plus<>());
}

template <class _InputIterator, class _OutputIterator, class _BinaryOp, class _Tp>
_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last,
                               _OutputIterator __result, _BinaryOp __b,  _Tp __init)
{
    for (; __first != __last; ++__first, (void) ++__result) {
        __init = __b(__init, *__first);
        *__result = __init;
        }
    return __result;
}

template <class _InputIterator, class _OutputIterator, class _BinaryOp>
_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last,
                               _OutputIterator __result, _BinaryOp __b)
{
    if (__first != __last) {
        typename std::iterator_traits<_InputIterator>::value_type __init = *__first;
        *__result++ = __init;
        if (++__first != __last)
            return _CUDA_VSTD::inclusive_scan(__first, __last, __result, __b, __init);
        }

    return __result;
}

template <class _InputIterator, class _OutputIterator>
_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last,
                               _OutputIterator __result)
{
    return _CUDA_VSTD::inclusive_scan(__first, __last, __result, std::plus<>());
}

template <class _InputIterator, class _OutputIterator, class _Tp,
          class _BinaryOp, class _UnaryOp>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
transform_exclusive_scan(_InputIterator __first, _InputIterator __last,
                           _OutputIterator __result, _Tp __init,
                           _BinaryOp __b, _UnaryOp __u)
{
    if (__first != __last)
    {
        _Tp __saved = __init;
        do
        {
            __init = __b(__init, __u(*__first));
            *__result = __saved;
            __saved = __init;
            ++__result;
        } while (++__first != __last);
    }
    return __result;
}

template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp, class _UnaryOp>
_OutputIterator transform_inclusive_scan(_InputIterator __first, _InputIterator __last,
                           _OutputIterator __result, _BinaryOp __b, _UnaryOp __u, _Tp __init)
{
    for (; __first != __last; ++__first, (void) ++__result) {
        __init = __b(__init, __u(*__first));
        *__result = __init;
        }

    return __result;
}

template <class _InputIterator, class _OutputIterator, class _BinaryOp, class _UnaryOp>
_OutputIterator transform_inclusive_scan(_InputIterator __first, _InputIterator __last,
                               _OutputIterator __result, _BinaryOp __b, _UnaryOp __u)
{
    if (__first != __last) {
        typename std::iterator_traits<_InputIterator>::value_type __init = __u(*__first);
        *__result++ = __init;
        if (++__first != __last)
            return _CUDA_VSTD::transform_inclusive_scan(__first, __last, __result, __b, __u, __init);
        }

    return __result;
}
#endif

template <class _InputIterator, class _OutputIterator>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
{
    if (__first != __last)
    {
        typename iterator_traits<_InputIterator>::value_type __t1(*__first);
        *__result = __t1;
        for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
        {
            typename iterator_traits<_InputIterator>::value_type __t2(*__first);
            *__result = __t2 - __t1;
            __t1 = _CUDA_VSTD::move(__t2);
        }
    }
    return __result;
}

template <class _InputIterator, class _OutputIterator, class _BinaryOperation>
inline _LIBCUDACXX_INLINE_VISIBILITY
_OutputIterator
adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result,
                      _BinaryOperation __binary_op)
{
    if (__first != __last)
    {
        typename iterator_traits<_InputIterator>::value_type __t1(*__first);
        *__result = __t1;
        for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
        {
            typename iterator_traits<_InputIterator>::value_type __t2(*__first);
            *__result = __binary_op(__t2, __t1);
            __t1 = _CUDA_VSTD::move(__t2);
        }
    }
    return __result;
}
#endif // __cuda_std__

template <class _ForwardIterator, class _Tp>
inline _LIBCUDACXX_INLINE_VISIBILITY
void
iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value_)
{
    for (; __first != __last; ++__first, (void) ++__value_)
        *__first = __value_;
}


#ifndef __cuda_std__
#if _LIBCUDACXX_STD_VER > 14
template <typename _Result, typename _Source, bool _IsSigned = is_signed<_Source>::value> struct __ct_abs;

template <typename _Result, typename _Source>
struct __ct_abs<_Result, _Source, true> {
    _LIBCUDACXX_CONSTEXPR _LIBCUDACXX_INLINE_VISIBILITY
    _Result operator()(_Source __t) const noexcept
    {
    if (__t >= 0) return __t;
    if (__t == numeric_limits<_Source>::min()) return -static_cast<_Result>(__t);
    return -__t;
    }
};

template <typename _Result, typename _Source>
struct __ct_abs<_Result, _Source, false> {
    _LIBCUDACXX_CONSTEXPR _LIBCUDACXX_INLINE_VISIBILITY
    _Result operator()(_Source __t) const noexcept { return __t; }
};


template<class _Tp>
_LIBCUDACXX_CONSTEXPR _LIBCUDACXX_HIDDEN
_Tp __gcd(_Tp __m, _Tp __n)
{
    static_assert((!is_signed<_Tp>::value), "");
    return __n == 0 ? __m : _CUDA_VSTD::__gcd<_Tp>(__n, __m % __n);
}


template<class _Tp, class _Up>
_LIBCUDACXX_CONSTEXPR _LIBCUDACXX_INLINE_VISIBILITY
common_type_t<_Tp,_Up>
gcd(_Tp __m, _Up __n)
{
    static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to gcd must be integer types");
    static_assert((!is_same<__remove_cv_t<_Tp>, bool>::value), "First argument to gcd cannot be bool" );
    static_assert((!is_same<__remove_cv_t<_Up>, bool>::value), "Second argument to gcd cannot be bool" );
    using _Rp = common_type_t<_Tp,_Up>;
    using _Wp = make_unsigned_t<_Rp>;
    return static_cast<_Rp>(_CUDA_VSTD::__gcd(
        static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)),
        static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n))));
}

template<class _Tp, class _Up>
_LIBCUDACXX_CONSTEXPR _LIBCUDACXX_INLINE_VISIBILITY
common_type_t<_Tp,_Up>
lcm(_Tp __m, _Up __n)
{
    static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to lcm must be integer types");
    static_assert((!is_same<__remove_cv_t<_Tp>, bool>::value), "First argument to lcm cannot be bool" );
    static_assert((!is_same<__remove_cv_t<_Up>, bool>::value), "Second argument to lcm cannot be bool" );
    if (__m == 0 || __n == 0)
        return 0;

    using _Rp = common_type_t<_Tp,_Up>;
    _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / _CUDA_VSTD::gcd(__m, __n);
    _Rp __val2 = __ct_abs<_Rp, _Up>()(__n);
    _LIBCUDACXX_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm");
    return __val1 * __val2;
}

#endif /* _LIBCUDACXX_STD_VER > 14 */

#if _LIBCUDACXX_STD_VER > 17
template <class _Tp>
_LIBCUDACXX_INLINE_VISIBILITY constexpr
enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp>
midpoint(_Tp __a, _Tp __b) noexcept
_LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
{
    using _Up = std::make_unsigned_t<_Tp>;

    int __sign = 1;
    _Up __m = __a;
    _Up __M = __b;
    if (__a > __b)
    {
        __sign = -1;
        __m = __b;
        __M = __a;
    }
     return __a + __sign * _Tp(_Up(__M-__m) >> 1);
}


template <class _TPtr>
_LIBCUDACXX_INLINE_VISIBILITY constexpr
enable_if_t<is_pointer_v<_TPtr>
             && is_object_v<remove_pointer_t<_TPtr>>
             && ! is_void_v<remove_pointer_t<_TPtr>>
             && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr>
midpoint(_TPtr __a, _TPtr __b) noexcept
{
    return __a + _CUDA_VSTD::midpoint(ptrdiff_t(0), __b - __a);
}


template <typename _Tp>
constexpr int __sign(_Tp __val) {
    return (_Tp(0) < __val) - (__val < _Tp(0));
}

template <typename _Fp>
constexpr _Fp __fp_abs(_Fp __f) { return __f >= 0 ? __f : -__f; }

template <class _Fp>
_LIBCUDACXX_INLINE_VISIBILITY constexpr
enable_if_t<is_floating_point_v<_Fp>, _Fp>
midpoint(_Fp __a, _Fp __b) noexcept
{
	constexpr _Fp __lo = numeric_limits<_Fp>::min()*2;
	constexpr _Fp __hi = numeric_limits<_Fp>::max()/2;
    return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi ?  // typical case: overflow is impossible
      (__a + __b)/2 :                                        // always correctly rounded
      __fp_abs(__a) < __lo ? __a + __b/2 :                   // not safe to halve a
      __fp_abs(__a) < __lo ? __a/2 + __b :                   // not safe to halve b
      __a/2 + __b/2;                                         // otherwise correctly rounded
}

#endif // _LIBCUDACXX_STD_VER > 17
#endif // __cuda_std__

_LIBCUDACXX_END_NAMESPACE_STD

#if defined(_LIBCUDACXX_HAS_PARALLEL_ALGORITHMS) && _LIBCUDACXX_STD_VER >= 17
#   include <__pstl_numeric>
#endif

#ifndef __cuda_std__
#include <__pragma_pop>
#endif //__cuda_std__

#endif  // _LIBCUDACXX_NUMERIC
